leaflet(GIS_Assignment)

Sites <- readOGR("./GIS_Assignment3.kml")
## OGR data source with driver: KML 
## Source: "C:\Users\ridew\OneDrive\Documents\Advanced Analytics\Graduate_Thesis_Analysis\Graduate_Thesis\GIS_Assignment3.kml", layer: "GIS_Assignment1"
## with 2 features
## It has 2 fields
leaflet(Sites) %>% 
  addTiles() %>% 
  addMarkers(popup = Sites@data$Name)%>%
  addProviderTiles(providers$CartoDB.Positron, group = "CartoDB") %>%
  addProviderTiles(providers$Esri.NatGeoWorldMap, group = "NatGeo") %>%
  addProviderTiles(providers$Esri.WorldImagery, group = "ESRI") %>%
  setView(lng = -87.2696542, lat = 36.4992402, zoom = 17)%>%
  addCircleMarkers(popup = Sites@data$Name,
                   label = Sites@data$Description,
                   group = "WBCS Points",
                   weight = 2,
                   color = "grey",
                   fillColor = "red",
                   fillOpacity = 0.7) %>%
  addLayersControl(
    baseGroups = c("OSM", "CartoDB", "NatGeo", "ESRI"),
    options = layersControlOptions(collapsed = FALSE),
    overlayGroups = "WBCS Points")
Figure 1. Southern Redbelly Dace (Chrosomus erythrogaster)

Spectrometry Terms:

1.) Hue: color or shade. It has reddish hues. Degree to which a stimulus can be described as similar or different from described stimuli. Dependent on the dominant wavelength, independent of intensity or lightness.

2.) Chroma: the vividness or dullness of a color; how close the color is to either gray or the pure hue (red). Chroma describes how pure, or monochromatic, a color is compared to a white surface with the same illumination

3.) Saturation: the degree of purity of a hue

4.) Spectral purity: quantification of the monochromaticity of a given light sample; stability of a signal; how clean a spectrum is compared to what it should be.

5.) Brightness: measures the ability to of a sample to reflect blue light; lower brightness values mean greater pigmentation; measures the reflection of a very specific wavelength of light.

6.) Spectral Whiteness: Measures the ability to reflect all colors of light….remember things like sand reflect light; measures the reflection of all wavelengths of light across the visible spectrum thus this measure is more in line without visual perception.

7.) A tint is created when any saturated hue on a spectrum is mixed with white to form a lighter color.

#1 The closer the Whiteness value is to 0, the more white tint and less red pigmentation the individual has. #2 The more sat, the more red pigmented the individual #3 The more chroma, the more red pigmented the individual

Let’s first look at our response variable: GSI_Value (Gonadosomatic Index Value).

#Gonadosomatic Index (GSI) = (Gonad Weight/Body Weight)*100

Thesis_Data1 <- read.csv("Thesis_Data1.csv")
na.omit(Thesis_Data1)
##    Individual    Season_Collected                        Site_Location Sat_Gill
## 1      SRBD05     Breeding_Season         Dry_Fork_Whites_creek_system    0.517
## 2      SRBD06     Breeding_Season         Dry_Fork_Whites_creek_system    0.568
## 3      SRBD07     Breeding_Season         Dry_Fork_Whites_creek_system    1.272
## 4      SRBD08     Breeding_Season         Dry_Fork_Whites_creek_system    2.501
## 5      SRBD09     Breeding_Season         Dry_Fork_Whites_creek_system    1.888
## 6     SRBD10      Breeding_Season         Dry_Fork_Whites_creek_system    1.859
## 7      SRBD11     Breeding_Season Rotary_Park_Wall_Branch_creek_system    0.433
## 8      SRBD12     Breeding_Season Rotary_Park_Wall_Branch_creek_system    0.923
## 9      SRBD13     Breeding_Season Rotary_Park_Wall_Branch_creek_system    1.107
## 10     SRBD14     Breeding_Season Rotary_Park_Wall_Branch_creek_system    1.341
## 11     SRBD15     Breeding_Season Rotary_Park_Wall_Branch_creek_system    1.037
## 12     SRBD16     Breeding_Season Rotary_Park_Wall_Branch_creek_system    0.494
## 13     SRBD17     Breeding_Season Rotary_Park_Wall_Branch_creek_system    1.210
## 14     SRBD18     Breeding_Season Rotary_Park_Wall_Branch_creek_system    1.387
## 15     SRBD19     Breeding_Season Rotary_Park_Wall_Branch_creek_system    0.887
## 16     SRBD20     Breeding_Season Rotary_Park_Wall_Branch_creek_system    0.775
## 17     SRBD21 Non-Breeding_Season         Dry_Fork_Whites_creek_system    0.813
## 18     SRBD22 Non-Breeding_Season         Dry_Fork_Whites_creek_system    0.911
## 19     SRBD23 Non-Breeding_Season         Dry_Fork_Whites_creek_system    0.842
## 20     SRBD24 Non-Breeding_Season         Dry_Fork_Whites_creek_system    1.047
## 21     SRBD25 Non-Breeding_Season         Dry_Fork_Whites_creek_system    0.535
## 22     SRBD26 Non-Breeding_Season         Dry_Fork_Whites_creek_system    1.614
## 23     SRBD27 Non-Breeding_Season         Dry_Fork_Whites_creek_system    0.867
## 24     SRBD28 Non-Breeding_Season         Dry_Fork_Whites_creek_system    0.389
## 25     SRBD29 Non-Breeding_Season         Dry_Fork_Whites_creek_system    1.274
## 26     SRBD30 Non-Breeding_Season Rotary_Park_Wall_Branch_creek_system    0.910
## 27     SRBD31 Non-Breeding_Season Rotary_Park_Wall_Branch_creek_system    0.849
## 28     SRBD32 Non-Breeding_Season Rotary_Park_Wall_Branch_creek_system    0.563
## 29     SRBD33 Non-Breeding_Season Rotary_Park_Wall_Branch_creek_system    1.002
## 30     SRBD34 Non-Breeding_Season Rotary_Park_Wall_Branch_creek_system    0.157
## 31     SRBD35 Non-Breeding_Season Rotary_Park_Wall_Branch_creek_system    0.154
##    DominantWavelength_Gill_nm Whiteness_Gill Chroma_Gill Tint_Gill
## 1                       589.6          -77.5      14.221    -46.99
## 2                       589.1          -92.7      13.766    -51.13
## 3                       594.4         -175.6      30.140   -126.48
## 4                       602.2         -257.1      60.131   -252.74
## 5                       599.7         -208.3      51.076   -191.95
## 6                       597.3         -229.1      48.925   -188.30
## 7                       593.9          -56.4       9.699    -42.19
## 8                       592.0         -135.8      24.398    -88.82
## 9                       600.4         -117.5      27.411   -113.26
## 10                      598.4         -150.5      38.099   -136.11
## 11                      595.3         -125.8      32.101   -103.38
## 12                      583.4         -101.1      12.953    -36.14
## 13                      593.3         -172.8      31.788   -119.05
## 14                      595.1         -185.8      33.710   -138.85
## 15                      594.6         -102.3      30.088    -87.73
## 16                      597.2          -87.6      21.200    -78.07
## 17                      597.7          -98.3      16.097    -82.24
## 18                      597.4         -113.0      16.032    -91.96
## 19                      594.0         -117.9      16.813    -82.68
## 20                      596.7         -133.0      20.488   -105.37
## 21                      594.0          -66.3      14.240    -52.27
## 22                      602.5         -166.1      32.850   -165.00
## 23                      596.8         -105.5      20.343    -87.18
## 24                      590.1          -58.8       8.797    -35.59
## 25                      596.8         -160.1      28.925   -128.45
## 26                      599.1         -104.4      18.298    -92.70
## 27                      591.9         -125.3      21.647    -81.39
## 28                      598.7          -62.8      12.270    -57.29
## 29                      597.5         -123.0      19.721   -101.20
## 30                      595.0          -18.2       2.945    -15.45
## 31                      584.2          -23.3       3.863    -11.39
##    Red_Coloration_Area Body_Condition_Factor GSI_Value Sex
## 1             2.350000             0.8422206  7.570329   F
## 2             2.969000             0.8905831  2.684388   F
## 3            18.237000             0.7797764  6.802955   F
## 4            26.563000             0.7813994  3.593015   M
## 5            23.845000             0.8039067  4.975702   F
## 6            14.425000             0.8668914  8.985812   F
## 7             8.504000             0.9369838 20.662427   F
## 8            12.299000             0.7448722 16.212114   F
## 9            19.260450             0.8632892  5.818025   F
## 10           22.011000             0.7961976 13.537048   F
## 11            5.320000             0.8560350 14.072611   F
## 12            9.542000             0.6476313  3.020408   M
## 13           10.331000             0.8373383  3.692622   M
## 14           15.288000             0.8511574  2.224640   M
## 15           13.199700             0.6588657  2.039452   M
## 16            2.503000             0.7463889  3.039325   M
## 17           27.893160             0.7726276  2.500496   F
## 18           19.517900             0.7682171  1.888853   M
## 19           22.125170             0.9247230  4.051327   F
## 20           24.001800             0.7673287  1.386120   M
## 21            3.478526             0.6991982  3.044118   M
## 22           24.074090             0.7318519  2.872874    
## 23            1.856426             0.7571918  5.120248   F
## 24            8.060000             0.7599881  4.341588   F
## 25           15.983910             0.8362804  3.174857    
## 26           12.001970             0.6950205  1.624142    
## 27           20.161590             0.6794739  1.403548    
## 28            4.524019             0.7571411  3.839178   F
## 29            7.151921             0.6615931  3.274809   F
## 30            4.888112             0.6878241  3.823114   F
## 31            4.850227             0.6608788  2.305263   F
Thesis_Data <- select(Thesis_Data1, Sat_Gill, DominantWavelength_Gill_nm, Whiteness_Gill, Chroma_Gill, Tint_Gill, Red_Coloration_Area, Body_Condition_Factor, GSI_Value)
View(Thesis_Data)
ggpairs(Thesis_Data, columns = 1:6) + theme_bw()

pairs(Thesis_Data[,1:6], lower.panel = NULL)

#Here Looking for low correlation between variables…so more of the shotgun pattern. The linear trends show high correlation between variables, which are better being thrown out.

correlation.matrix <- cor(Thesis_Data[,1:6])
round(correlation.matrix, 2)
##                            Sat_Gill DominantWavelength_Gill_nm Whiteness_Gill
## Sat_Gill                       1.00                       0.65          -0.96
## DominantWavelength_Gill_nm     0.65                       1.00          -0.49
## Whiteness_Gill                -0.96                      -0.49           1.00
## Chroma_Gill                    0.96                       0.56          -0.94
## Tint_Gill                     -1.00                      -0.68           0.95
## Red_Coloration_Area            0.64                       0.49          -0.61
##                            Chroma_Gill Tint_Gill Red_Coloration_Area
## Sat_Gill                          0.96     -1.00                0.64
## DominantWavelength_Gill_nm        0.56     -0.68                0.49
## Whiteness_Gill                   -0.94      0.95               -0.61
## Chroma_Gill                       1.00     -0.96                0.54
## Tint_Gill                        -0.96      1.00               -0.64
## Red_Coloration_Area               0.54     -0.64                1.00

Correlation between the variables. Anything above ~0.5 we should look at.

options(na.action = “na.fail”)

options(width = 90)

Now running lm with additive effects of the variables in relation to GSI_Value.

model_GSI <- lm(GSI_Value ~ Sat_Gill + DominantWavelength_Gill_nm + Whiteness_Gill + Chroma_Gill + Tint_Gill + Red_Coloration_Area, data=Thesis_Data)
anova (model_GSI) 
## Analysis of Variance Table
## 
## Response: GSI_Value
##                            Df Sum Sq Mean Sq F value  Pr(>F)  
## Sat_Gill                    1   0.05   0.051  0.0022 0.96308  
## DominantWavelength_Gill_nm  1   0.67   0.668  0.0288 0.86674  
## Whiteness_Gill              1   0.58   0.578  0.0249 0.87589  
## Chroma_Gill                 1 102.24 102.238  4.4041 0.04656 *
## Tint_Gill                   1   1.80   1.801  0.0776 0.78300  
## Red_Coloration_Area         1   0.37   0.371  0.0160 0.90042  
## Residuals                  24 557.15  23.214                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance::check_collinearity(model_GSI)
## # Check for Multicollinearity
## 
## Low Correlation
## 
##                 Term  VIF Increased SE Tolerance
##  Red_Coloration_Area 1.99         1.41      0.50
## 
## Moderate Correlation
## 
##                        Term  VIF Increased SE Tolerance
##  DominantWavelength_Gill_nm 9.24         3.04      0.11
## 
## High Correlation
## 
##            Term     VIF Increased SE Tolerance
##        Sat_Gill 1866.77        43.21      0.00
##  Whiteness_Gill   27.63         5.26      0.04
##     Chroma_Gill   18.72         4.33      0.05
##       Tint_Gill 1875.00        43.30      0.00

Red Coloration Area has a VIF <5 showing low correlation with other predictors variables, so best chances for finding any correlation to GSI_Value.

Dredge out models from the previous linear model for EXPLORATORY PURPOSE ONLY.

options(na.action = "na.fail") # otherwise blows up with NA values
dredge_GSI<-dredge(model_GSI)
## Fixed term is "(Intercept)"
dredge_GSI
## Global model call: lm(formula = GSI_Value ~ Sat_Gill + DominantWavelength_Gill_nm + 
##     Whiteness_Gill + Chroma_Gill + Tint_Gill + Red_Coloration_Area, 
##     data = Thesis_Data)
## ---
## Model selection table 
##       (Int) Chr_Gll DmW_Gll_nm Red_Clr_Are   Sat_Gll    Tnt_Gll   Wht_Gll df
## 1     5.277                                                                2
## 10    5.502 0.48520                        -12.04000                       4
## 18    4.964 0.43120                                   0.1024000            4
## 5     6.171                       -0.06810                                 3
## 2     4.385 0.03770                                                        3
## 33    5.071                                                     -0.001698  3
## 3    16.760          -0.019290                                             3
## 17    5.199                                          -0.0008048            3
## 9     5.199                                  0.07970                       3
## 26    6.279 0.51750                        -31.22000 -0.1772000            5
## 12  -78.110 0.53030   0.141800             -13.97000                       5
## 34    5.962 0.24790                                              0.054090  4
## 20 -134.900 0.52720   0.237100                        0.1390000            5
## 6     5.147 0.08507               -0.14330                                 4
## 14    5.554 0.47290               -0.01848 -11.54000                       5
## 42    5.462 0.48440                        -12.22000            -0.001926  5
## 50    5.337 0.44790                                   0.0917500  0.014870  5
## 22    5.095 0.41170               -0.03308            0.0945100            5
## 37    5.337                       -0.12070                      -0.012610  4
## 21    5.645                       -0.12230           -0.0128400            4
## 13    5.593                       -0.12140   1.31500                       4
## 4    74.910 0.06008  -0.119400                                             4
## 7   -26.080           0.054510    -0.08272                                 4
## 41    4.925                                 -1.32800            -0.013570  4
## 49    4.934                                           0.0094060 -0.010340  4
## 35   27.930          -0.038730                                  -0.003221  4
## 19   33.830          -0.048560                       -0.0035850            4
## 11   31.000          -0.043760               0.32710                       4
## 25    5.262                                 -1.44700 -0.0147200            4
## 38    6.123 0.23140               -0.10020                       0.041330  5
## 36   98.340 0.29100  -0.156200                                   0.057650  5
## 58    6.116 0.52920                        -52.75000 -0.3394000 -0.042670  6
## 44 -161.400 0.55620   0.281600             -19.94000            -0.043760  6
## 52 -214.900 0.54480   0.371400                        0.1832000 -0.032690  6
## 30    6.326 0.50580               -0.01739 -30.70000 -0.1767000            6
## 28  -29.010 0.52490   0.059400             -25.17000 -0.1139000            6
## 8    35.840 0.09233  -0.052020    -0.13580                                 5
## 16  -77.960 0.51810   0.141700    -0.01811 -13.48000                       6
## 24 -132.300 0.51250   0.232800    -0.02187            0.1331000            6
## 46    5.512 0.47200               -0.01857 -11.73000            -0.002060  6
## 54    5.427 0.42950               -0.02918            0.0855000  0.013820  6
## 39   -4.967           0.017460    -0.12360                      -0.012180  5
## 53    5.378                       -0.12230           -0.0025840 -0.010380  5
## 45    5.338                       -0.12080   0.01318            -0.012490  5
## 23   22.380          -0.028390    -0.12100           -0.0143300            5
## 29    5.756                       -0.12270  -2.51200 -0.0370300            5
## 15   15.330          -0.016530    -0.12030   1.39800                       5
## 27  152.800          -0.248200             -28.48000 -0.2890000            5
## 57    5.139                                -15.32000 -0.1201000 -0.028370  5
## 43    8.496          -0.006025              -1.19700            -0.012640  5
## 51   16.820          -0.020040                        0.0054320 -0.007474  5
## 40   71.150 0.26530  -0.110000    -0.07883                       0.046560  6
## 60  -85.000 0.54960   0.153300             -40.06000 -0.1979000 -0.048460  7
## 62    6.166 0.51680               -0.01843 -52.26000 -0.3393000 -0.042800  7
## 48 -161.700 0.54300   0.282200    -0.01985 -19.44000            -0.043990  7
## 56 -213.300 0.52850   0.368800    -0.02470            0.1773000 -0.033250  7
## 32  -29.560 0.51310   0.060410    -0.01763 -24.54000 -0.1123000            7
## 61    5.628                       -0.12510 -17.93000 -0.1545000 -0.031490  6
## 31  122.100          -0.195900    -0.11700 -23.80000 -0.2524000            6
## 47  -18.180           0.039690    -0.12310  -0.82200            -0.018610  6
## 55   -9.764           0.025530    -0.12350            0.0023620 -0.014020  6
## 59  132.200          -0.213700             -35.03000 -0.3291000 -0.021070  6
## 64  -85.810 0.53690   0.154800    -0.01919 -39.43000 -0.1965000 -0.048650  8
## 63   95.640          -0.151400    -0.12030 -31.80000 -0.3013000 -0.026200  7
##     logLik  AICc delta weight
## 1  -91.457 187.3  0.00  0.127
## 10 -89.179 187.9  0.55  0.096
## 18 -89.457 188.5  1.11  0.073
## 5  -91.237 189.4  2.02  0.046
## 2  -91.277 189.4  2.10  0.044
## 33 -91.451 189.8  2.45  0.037
## 3  -91.452 189.8  2.45  0.037
## 17 -91.456 189.8  2.46  0.037
## 9  -91.456 189.8  2.46  0.037
## 26 -88.999 190.4  3.06  0.028
## 12 -89.009 190.4  3.08  0.027
## 34 -90.450 190.4  3.10  0.027
## 20 -89.077 190.6  3.21  0.025
## 6  -90.571 190.7  3.34  0.024
## 14 -89.170 190.7  3.40  0.023
## 42 -89.178 190.8  3.41  0.023
## 50 -89.410 191.2  3.88  0.018
## 22 -89.428 191.3  3.91  0.018
## 37 -91.018 191.6  4.23  0.015
## 21 -91.039 191.6  4.27  0.015
## 13 -91.043 191.6  4.28  0.015
## 4  -91.137 191.8  4.47  0.014
## 7  -91.205 191.9  4.61  0.013
## 41 -91.427 192.4  5.05  0.010
## 49 -91.434 192.4  5.06  0.010
## 35 -91.434 192.4  5.07  0.010
## 19 -91.438 192.4  5.07  0.010
## 11 -91.440 192.4  5.08  0.010
## 25 -91.455 192.4  5.11  0.010
## 38 -90.138 192.7  5.33  0.009
## 36 -90.199 192.8  5.46  0.008
## 58 -88.817 193.1  5.79  0.007
## 44 -88.824 193.1  5.81  0.007
## 52 -88.965 193.4  6.09  0.006
## 30 -88.991 193.5  6.14  0.006
## 28 -88.992 193.5  6.14  0.006
## 8  -90.545 193.5  6.15  0.006
## 16 -89.000 193.5  6.16  0.006
## 24 -89.064 193.6  6.29  0.005
## 46 -89.169 193.8  6.50  0.005
## 54 -89.387 194.3  6.93  0.004
## 39 -91.015 194.4  7.09  0.004
## 53 -91.017 194.4  7.09  0.004
## 45 -91.018 194.4  7.09  0.004
## 23 -91.033 194.5  7.12  0.004
## 29 -91.036 194.5  7.13  0.004
## 15 -91.041 194.5  7.14  0.004
## 27 -91.344 195.1  7.75  0.003
## 57 -91.386 195.2  7.83  0.003
## 43 -91.427 195.3  7.91  0.002
## 51 -91.433 195.3  7.92  0.002
## 40 -90.026 195.6  8.21  0.002
## 60 -88.774 196.4  9.08  0.001
## 62 -88.808 196.5  9.14  0.001
## 48 -88.813 196.5  9.15  0.001
## 56 -88.948 196.8  9.42  0.001
## 32 -88.984 196.8  9.49  0.001
## 61 -90.949 197.4 10.06  0.001
## 31 -90.967 197.4 10.09  0.001
## 47 -91.011 197.5 10.18  0.001
## 55 -91.015 197.5 10.19  0.001
## 59 -91.308 198.1 10.77  0.001
## 64 -88.764 200.1 12.73  0.000
## 63 -90.910 200.7 13.35  0.000
## Models ranked by AICc(x)

Here we see there are 64 possible models based on additive combinations of the variables.

Let’s dredge models for a delta <2/ within two AICc units.

subset(dredge_GSI, delta <2)
## Global model call: lm(formula = GSI_Value ~ Sat_Gill + DominantWavelength_Gill_nm + 
##     Whiteness_Gill + Chroma_Gill + Tint_Gill + Red_Coloration_Area, 
##     data = Thesis_Data)
## ---
## Model selection table 
##    (Int) Chr_Gll Sat_Gll Tnt_Gll df  logLik  AICc delta weight
## 1  5.277                          2 -91.457 187.3  0.00  0.429
## 10 5.502  0.4852  -12.04          4 -89.179 187.9  0.55  0.325
## 18 4.964  0.4312          0.1024  4 -89.457 188.5  1.11  0.246
## Models ranked by AICc(x)

The dredged models include model 1, 10, and 18. Notice model 1 includes only the intercept and none of the predictor variables. Model 10 includes the intercept (slope) Chroma_gill and Saturation_gill. Lastly, model 18 included both Chroma_Gill and Tint_Gill.

Now let’s rank variables support in the models above. This calculates variable importance weights.

importance(dredge_GSI)
##                      Chroma_Gill Sat_Gill Tint_Gill Red_Coloration_Area
## Sum of weights:      0.52        0.34     0.31      0.24               
## N containing models:   32          32       32        32               
##                      Whiteness_Gill DominantWavelength_Gill_nm
## Sum of weights:      0.23           0.22                      
## N containing models:   32             32

The results of importance show that each of the six variables show up 32 times in the models with Chroma_Gill with the most weight. And the other variables show to be close in weights.

Based on the effects of variables in the models using subset(dredge_GSI, delta <2) and some added models, I ran the following lm:

model10 <- lm(GSI_Value~Chroma_Gill + Sat_Gill, data = Thesis_Data)
model18 <- lm(GSI_Value~Chroma_Gill + Tint_Gill, data = Thesis_Data)
model10a <- lm(GSI_Value~Sat_Gill, data = Thesis_Data)
model10b <- lm(GSI_Value~Chroma_Gill, data = Thesis_Data)
model18a <-lm(GSI_Value~Chroma_Gill, data = Thesis_Data)
model18b <-lm(GSI_Value~Tint_Gill, data = Thesis_Data)
model19 <- lm(GSI_Value~Red_Coloration_Area, data = Thesis_Data)
out.put1<-model.sel(model10, model18, model10a, model10b, model18a, model18b, model19)
out.put1
## Model selection table 
##          (Int) Chr_Gll  Sat_Gll    Tnt_Gll Red_Clr_Are             family df
## model10  5.502  0.4852 -12.0400                        gaussian(identity)  4
## model18  4.964  0.4312           0.1024000             gaussian(identity)  4
## model19  6.171                                 -0.0681 gaussian(identity)  3
## model10b 4.385  0.0377                                 gaussian(identity)  3
## model18a 4.385  0.0377                                 gaussian(identity)  3
## model18b 5.199                  -0.0008048             gaussian(identity)  3
## model10a 5.199           0.0797                        gaussian(identity)  3
##           logLik  AICc delta weight
## model10  -89.179 187.9  0.00  0.254
## model18  -89.457 188.5  0.56  0.193
## model19  -91.237 189.4  1.47  0.122
## model10b -91.277 189.4  1.55  0.117
## model18a -91.277 189.4  1.55  0.117
## model18b -91.456 189.8  1.90  0.098
## model10a -91.456 189.8  1.90  0.098
## Models ranked by AICc(x)

Low delta <2 for both. Model 10 which includes both Chroma_Gill and Saturation_Gill is about 1.32 times more likely than model 18 which contains Chroma_Gill and Tint_Gill. Model 10 is also about 2.08 times more likely than model19.

importance(out.put1)
##                      Chroma_Gill Sat_Gill Tint_Gill Red_Coloration_Area
## Sum of weights:      0.68        0.35     0.29      0.12               
## N containing models:    4           2        2         1

Here are the importance values of each of the variables used in the model selection.Chroma_Gill carrying most of that weight.

Now to average the models using the lm from out.put1:

model.avg(out.put1, revised.var = TRUE)
## 
## Call:
## model.avg(object = out.put1, revised.var = TRUE)
## 
## Component models: 
## '12' '13' '4'  '1a' '1b' '3'  '2' 
## 
## Coefficients: 
##        (Intercept) Chroma_Gill  Sat_Gill  Tint_Gill Red_Coloration_Area
## full      5.158668   0.2152772 -3.053739 0.01964330        -0.008315297
## subset    5.158668   0.3158742 -8.663425 0.06756885        -0.068095538
summary(model.avg(out.put1))
## 
## Call:
## model.avg(object = out.put1)
## 
## Component model call: 
## lm(formula = <6 unique values>, data = Thesis_Data)
## 
## Component models: 
##    df logLik   AICc delta weight
## 12  4 -89.18 187.90  0.00   0.25
## 13  4 -89.46 188.45  0.56   0.19
## 4   3 -91.24 189.36  1.47   0.12
## 1a  3 -91.28 189.44  1.55   0.12
## 1b  3 -91.28 189.44  1.55   0.12
## 3   3 -91.46 189.80  1.90   0.10
## 2   3 -91.46 189.80  1.90   0.10
## 
## Term codes: 
##         Chroma_Gill            Sat_Gill           Tint_Gill Red_Coloration_Area 
##                   1                   2                   3                   4 
## 
## Model-averaged coefficients:  
## (full average) 
##                      Estimate Std. Error Adjusted SE z value Pr(>|z|)   
## (Intercept)          5.158668   1.831655    1.905478   2.707  0.00678 **
## Chroma_Gill          0.215277   0.271057    0.275073   0.783  0.43385   
## Sat_Gill            -3.053739   6.073988    6.144613   0.497  0.61920   
## Tint_Gill            0.019643   0.047325    0.047911   0.410  0.68181   
## Red_Coloration_Area -0.008315   0.043207    0.044594   0.186  0.85208   
##  
## (conditional average) 
##                     Estimate Std. Error Adjusted SE z value Pr(>|z|)   
## (Intercept)          5.15867    1.83165     1.90548   2.707  0.00678 **
## Chroma_Gill          0.31587    0.27573     0.28151   1.122  0.26183   
## Sat_Gill            -8.66343    7.48778     7.64951   1.133  0.25740   
## Tint_Gill            0.06757    0.06683     0.06825   0.990  0.32214   
## Red_Coloration_Area -0.06810    0.10591     0.11052   0.616  0.53780   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Term Codes for the variables: 1 = Chroma_Gill, 2 = Sat_Gill, and 3 = Tint_Gill, and 4 = Red Coloration Area Look at component models: AICc values for model 10 and model 18 are very close to each other, providing support for both.

You can see that both the full average and conditional average outputs are different, notice differences in P-values and the contributions of each of the variable’s effect under the Estimate column. From the estimate column, we see that in both the full and conditional Saturation_Gill has a negative effect on GSI_Value and Chroma_Gill and Tint_Gill has a positive effect on GSI_value.

model10 <- lm(GSI_Value~Chroma_Gill + Sat_Gill, data = Thesis_Data)
model18 <- lm(GSI_Value~Chroma_Gill + Tint_Gill, data = Thesis_Data)
anova(model18)
## Analysis of Variance Table
## 
## Response: GSI_Value
##             Df Sum Sq Mean Sq F value Pr(>F)  
## Chroma_Gill  1   7.63   7.634  0.3669 0.5496  
## Tint_Gill    1  72.59  72.588  3.4884 0.0723 .
## Residuals   28 582.63  20.808                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model18)
## 
## Call:
## lm(formula = GSI_Value ~ Chroma_Gill + Tint_Gill, data = Thesis_Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.9132 -2.7457 -0.8285  0.6398 15.8374 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  4.96423    1.71268   2.899  0.00721 **
## Chroma_Gill  0.43124    0.21971   1.963  0.05967 . 
## Tint_Gill    0.10244    0.05485   1.868  0.07230 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.562 on 28 degrees of freedom
## Multiple R-squared:  0.121,  Adjusted R-squared:  0.05824 
## F-statistic: 1.928 on 2 and 28 DF,  p-value: 0.1643
anova(model10)
## Analysis of Variance Table
## 
## Response: GSI_Value
##             Df Sum Sq Mean Sq F value  Pr(>F)  
## Chroma_Gill  1   7.63   7.634  0.3735 0.54602  
## Sat_Gill     1  82.96  82.956  4.0589 0.05363 .
## Residuals   28 572.26  20.438                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model10)
## 
## Call:
## lm(formula = GSI_Value ~ Chroma_Gill + Sat_Gill, data = Thesis_Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.3841 -2.6634 -0.9783  0.6944 15.6669 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   5.5019     1.7589   3.128  0.00408 **
## Chroma_Gill   0.4852     0.2305   2.105  0.04442 * 
## Sat_Gill    -12.0386     5.9755  -2.015  0.05363 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.521 on 28 degrees of freedom
## Multiple R-squared:  0.1367, Adjusted R-squared:  0.075 
## F-statistic: 2.216 on 2 and 28 DF,  p-value: 0.1278
out.put1<-model.sel(model10, model18)
out.put1
## Model selection table 
##         (Int) Chr_Gll Sat_Gll Tnt_Gll             family df  logLik  AICc delta
## model10 5.502  0.4852  -12.04         gaussian(identity)  4 -89.179 187.9  0.00
## model18 4.964  0.4312          0.1024 gaussian(identity)  4 -89.457 188.5  0.56
##         weight
## model10  0.569
## model18  0.431
## Models ranked by AICc(x)

Low delta <2 for both. Model 10 which includes both Chroma_Gill and Saturation_Gill is about 1.32 times more likely than model 18 which contains Chroma_Gill and Tint_Gill.

importance(out.put1)
##                      Chroma_Gill Sat_Gill Tint_Gill
## Sum of weights:      1.00        0.57     0.43     
## N containing models:    2           1        1

Here are the importance values of each of the variables used in the model selection.Chroma_Gill carrying most of that weight.

Now to average the models using the lm from out.put1:

model.avg(out.put1, revised.var = TRUE)
## 
## Call:
## model.avg(object = out.put1, revised.var = TRUE)
## 
## Component models: 
## '12' '13'
## 
## Coefficients: 
##        (Intercept) Chroma_Gill   Sat_Gill  Tint_Gill
## full      5.270226    0.461975  -6.851555 0.04413711
## subset    5.270226    0.461975 -12.038597 0.10243775
summary(model.avg(out.put1))
## 
## Call:
## model.avg(object = out.put1)
## 
## Component model call: 
## lm(formula = <2 unique values>, data = Thesis_Data)
## 
## Component models: 
##    df logLik   AICc delta weight
## 12  4 -89.18 187.90  0.00   0.57
## 13  4 -89.46 188.45  0.56   0.43
## 
## Term codes: 
## Chroma_Gill    Sat_Gill   Tint_Gill 
##           1           2           3 
## 
## Model-averaged coefficients:  
## (full average) 
##             Estimate Std. Error Adjusted SE z value Pr(>|z|)   
## (Intercept)  5.27023    1.75942     1.83703   2.869  0.00412 **
## Chroma_Gill  0.46198    0.22752     0.23764   1.944  0.05190 . 
## Sat_Gill    -6.85155    7.47401     7.59843   0.902  0.36721   
## Tint_Gill    0.04414    0.06220     0.06316   0.699  0.48465   
##  
## (conditional average) 
##              Estimate Std. Error Adjusted SE z value Pr(>|z|)   
## (Intercept)   5.27023    1.75942     1.83703   2.869  0.00412 **
## Chroma_Gill   0.46198    0.22752     0.23764   1.944  0.05190 . 
## Sat_Gill    -12.03860    5.97545     6.24510   1.928  0.05389 . 
## Tint_Gill     0.10244    0.05485     0.05732   1.787  0.07392 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Notice the very close P-values!!

w10 <- ggplot(Thesis_Data, aes(Chroma_Gill + Sat_Gill, GSI_Value)) + 
  geom_point() +
  geom_smooth(method="lm")

w18 <- ggplot(Thesis_Data, aes(Chroma_Gill + Tint_Gill, GSI_Value)) + 
  geom_point() +
  geom_smooth(method="lm")

w10a <-  ggplot(Thesis_Data, aes(Chroma_Gill, GSI_Value)) + 
  geom_point() +
  geom_smooth(method="lm")

w10b <-  ggplot(Thesis_Data, aes(Sat_Gill, GSI_Value)) + 
  geom_point() +
  geom_smooth(method="lm")


w18a <- ggplot(Thesis_Data, aes(Chroma_Gill, GSI_Value)) + 
  geom_point() +
  geom_smooth(method="lm")


w18b <- ggplot(Thesis_Data, aes(Tint_Gill, GSI_Value)) + 
  geom_point() +
  geom_smooth(method="lm")

w19 <- ggplot(Thesis_Data, aes(Red_Coloration_Area, GSI_Value)) + 
  geom_point() +
  geom_smooth(method="lm")


 w10 / w18 / w10a / w10b / w18a / w18b / w19
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

p1c <- ggplot(Thesis_Data, aes(Chroma_Gill + Sat_Gill, GSI_Value)) + 
  geom_point(size=3) +
  geom_smooth(method="lm")

p1c
## `geom_smooth()` using formula 'y ~ x'

p1c <- ggplot(Thesis_Data, aes(Chroma_Gill + Tint_Gill, GSI_Value)) + 
  geom_point(size=3) +
  geom_smooth(method="lm")

p1c
## `geom_smooth()` using formula 'y ~ x'

wchroma <- ggplot(Thesis_Data, aes(Chroma_Gill, GSI_Value)) + 
  geom_point() +
  geom_smooth(method="lm")


wtint <- ggplot(Thesis_Data, aes(Tint_Gill, GSI_Value)) + 
  geom_point() +
  geom_smooth(method="lm")

wsat <- ggplot(Thesis_Data, aes(Sat_Gill, GSI_Value)) + 
  geom_point() +
  geom_smooth(method="lm")

wchroma / wtint / wsat
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

Now to look at our second response variable: Body Condition Factor

#Body Condition Factor (BCF) = ((body weight(g) - gut content(g)) / length(cm)^3) *100.

Figure 4. Southern Redbelly Dace (Chrosomus erythrogaster) Body Condition Measurments

options(na.action = “na.fail”)

options(width = 90)

Now running lm with additive effects of the variables in relation to GSI_Value.

model_Body_Condition <- lm(Body_Condition_Factor ~ Sat_Gill + DominantWavelength_Gill_nm + Whiteness_Gill + Chroma_Gill + Tint_Gill + Red_Coloration_Area, data=Thesis_Data)
anova (model_Body_Condition) 
## Analysis of Variance Table
## 
## Response: Body_Condition_Factor
##                            Df   Sum Sq   Mean Sq F value Pr(>F)
## Sat_Gill                    1 0.011577 0.0115768  1.7355 0.2001
## DominantWavelength_Gill_nm  1 0.000004 0.0000043  0.0007 0.9799
## Whiteness_Gill              1 0.012668 0.0126683  1.8992 0.1809
## Chroma_Gill                 1 0.001569 0.0015695  0.2353 0.6320
## Tint_Gill                   1 0.007074 0.0070743  1.0605 0.3134
## Red_Coloration_Area         1 0.000122 0.0001215  0.0182 0.8938
## Residuals                  24 0.160091 0.0066704
performance::check_collinearity(model_Body_Condition)
## # Check for Multicollinearity
## 
## Low Correlation
## 
##                 Term  VIF Increased SE Tolerance
##       Whiteness_Gill 4.10         2.02      0.24
##  Red_Coloration_Area 1.11         1.05      0.90
## 
## High Correlation
## 
##                        Term    VIF Increased SE Tolerance
##                    Sat_Gill 291.67        17.08      0.00
##  DominantWavelength_Gill_nm  37.87         6.15      0.03
##                 Chroma_Gill  11.43         3.38      0.09
##                   Tint_Gill 370.50        19.25      0.00

Whiteness_Gill and Red Coloration Area has a VIF <5 showing low correlation with other predictors variables, so best chances for finding any correlation to Body Condition Factor. Sat_Gill, DominantWavelength_Gill, Chroma_Gill, and Tint_Gill show high correlation.

Dredge out models from the previous linear model for EXPLORATORY PURPOSE ONLY.

options(na.action = "na.fail") # otherwise blows up with NA values
dredge_Body_Condition <-dredge(model_Body_Condition)
## Fixed term is "(Intercept)"
dredge_Body_Condition
## Global model call: lm(formula = Body_Condition_Factor ~ Sat_Gill + DominantWavelength_Gill_nm + 
##     Whiteness_Gill + Chroma_Gill + Tint_Gill + Red_Coloration_Area, 
##     data = Thesis_Data)
## ---
## Model selection table 
##       (Int)    Chr_Gll DmW_Gll_nm Red_Clr_Are  Sat_Gll    Tnt_Gll    Wht_Gll df
## 33  0.72640                                                       -0.0004120  3
## 1   0.77620                                                                   2
## 17  0.74040                                            -0.0003712             3
## 2   0.74130  0.0014750                                                        3
## 9   0.73920                                    0.03805                        3
## 3  -0.95600             2.911e-03                                             3
## 5   0.75950                         1.277e-03                                 3
## 41  0.71920                                   -0.06452            -0.0009889  4
## 49  0.72070                                             0.0003868 -0.0007673  4
## 34  0.72280 -0.0010010                                            -0.0006373  4
## 37  0.72790                        -7.104e-04                     -0.0004762  4
## 35  0.39810             5.560e-04                                 -0.0003902  4
## 57  0.73790                                   -1.28500 -0.0104800 -0.0022800  5
## 25  0.74780                                   -0.17020 -0.0020080             4
## 21  0.74220                        -4.967e-04          -0.0004201             4
## 18  0.74010  0.0006157                                 -0.0002238             4
## 19  0.87340            -2.256e-04                      -0.0003841             4
## 4   0.35280  0.0013520  6.577e-04                                             4
## 10  0.73980  0.0008515                         0.01678                        4
## 13  0.74070                        -4.511e-04  0.04264                        4
## 6   0.74150  0.0014880             -3.916e-05                                 4
## 11  0.67350             1.116e-04              0.03742                        4
## 43 -2.65700             5.696e-03             -0.18810            -0.0018700  5
## 7  -0.61870             2.330e-03   6.514e-04                                 4
## 51 -2.79800             5.932e-03                       0.0015630 -0.0016140  5
## 45  0.72060                        -4.011e-04 -0.06006            -0.0009854  5
## 42  0.71970  0.0004398                        -0.07440            -0.0009784  5
## 53  0.72250                        -4.987e-04           0.0003379 -0.0007674  5
## 38  0.72410 -0.0011340             -8.106e-04                     -0.0007405  5
## 50  0.72050 -0.0002624                                  0.0003386 -0.0007820  5
## 36  0.08378 -0.0012990  1.081e-03                                 -0.0006619  5
## 39  0.16660             9.514e-04  -8.697e-04                     -0.0004532  5
## 58  0.74140  0.0019260                        -1.42200 -0.0112800 -0.0023330  6
## 27  4.31300            -5.999e-03             -0.82370 -0.0086370             5
## 61  0.74060                        -7.018e-04 -1.30000 -0.0106700 -0.0022980  6
## 59  2.16200            -2.396e-03             -1.50600 -0.0128200 -0.0021990  6
## 26  0.75040  0.0012920                        -0.24450 -0.0024140             5
## 29  0.74990                        -5.235e-04 -0.17470 -0.0021040             5
## 22  0.74170  0.0003697             -4.166e-04          -0.0003237             5
## 23  0.82700            -1.439e-04  -4.902e-04          -0.0004277             5
## 20  0.65850  0.0006717  1.384e-04                      -0.0002025             5
## 14  0.74070  0.0006437             -3.110e-04  0.02514                        5
## 12  0.46820  0.0009978  4.608e-04              0.01052                        5
## 8   0.31110  0.0013870  7.297e-04  -1.450e-04                                 5
## 15  0.61280             2.170e-04  -4.656e-04  0.04156                        5
## 44 -3.32300  0.0021790  6.823e-03             -0.26150            -0.0019920  6
## 47 -2.82000             5.976e-03  -7.541e-04 -0.18580            -0.0019070  6
## 52 -3.47100  0.0015810  7.068e-03                       0.0020790 -0.0016870  6
## 55 -2.96800             6.222e-03  -7.853e-04           0.0015430 -0.0016560  6
## 46  0.72070  0.0001996             -3.579e-04 -0.06503            -0.0009810  6
## 40 -0.31380 -0.0016750  1.756e-03  -1.152e-03                     -0.0008240  6
## 54  0.72240 -0.0006682             -6.436e-04           0.0002009 -0.0008051  6
## 31  4.22100            -5.841e-03  -3.538e-04 -0.80950 -0.0085270             6
## 28  4.10700  0.0005963 -5.650e-03             -0.81990 -0.0084380             6
## 62  0.74240  0.0016890             -3.531e-04 -1.41200 -0.0112800 -0.0023350  7
## 60  1.46500  0.0017640 -1.218e-03             -1.52200 -0.0124000 -0.0022870  7
## 63  1.96900            -2.067e-03  -6.361e-04 -1.48900 -0.0126800 -0.0022260  7
## 30  0.75120  0.0010920             -2.962e-04 -0.23560 -0.0024050             6
## 24  0.70800  0.0003945  5.718e-05  -4.139e-04          -0.0003142             6
## 16  0.47060  0.0007900  4.582e-04  -3.098e-04  0.01888                        6
## 48 -3.32800  0.0019210  6.834e-03  -3.888e-04 -0.25170            -0.0019960  7
## 56 -3.43500  0.0012140  7.010e-03  -5.585e-04           0.0019450 -0.0017000  7
## 32  4.09800  0.0004139 -5.634e-03  -2.737e-04 -0.81010 -0.0084140             7
## 64  1.45100  0.0015340 -1.192e-03  -3.472e-04 -1.51100 -0.0123800 -0.0022900  8
##    logLik  AICc delta weight
## 33 36.048 -65.2  0.00  0.100
## 1  34.730 -65.0  0.18  0.091
## 17 35.714 -64.5  0.67  0.071
## 2  35.698 -64.5  0.70  0.070
## 9  35.688 -64.5  0.72  0.069
## 3  35.146 -63.4  1.80  0.040
## 5  34.995 -63.1  2.11  0.035
## 41 36.261 -63.0  2.22  0.033
## 49 36.153 -62.8  2.44  0.029
## 34 36.107 -62.7  2.53  0.028
## 37 36.103 -62.7  2.54  0.028
## 35 36.061 -62.6  2.62  0.027
## 57 37.486 -62.6  2.63  0.027
## 25 35.765 -62.0  3.22  0.020
## 21 35.739 -61.9  3.27  0.019
## 18 35.728 -61.9  3.29  0.019
## 19 35.715 -61.9  3.32  0.019
## 4  35.713 -61.9  3.32  0.019
## 10 35.712 -61.9  3.32  0.019
## 13 35.709 -61.9  3.33  0.019
## 6  35.698 -61.9  3.35  0.019
## 11 35.688 -61.8  3.37  0.018
## 43 36.810 -61.2  3.99  0.014
## 7  35.200 -60.9  4.35  0.011
## 51 36.614 -60.8  4.38  0.011
## 45 36.278 -60.2  5.05  0.008
## 42 36.267 -60.1  5.07  0.008
## 53 36.179 -60.0  5.25  0.007
## 38 36.178 -60.0  5.25  0.007
## 50 36.155 -59.9  5.30  0.007
## 36 36.149 -59.9  5.31  0.007
## 39 36.137 -59.9  5.33  0.007
## 58 37.615 -59.7  5.48  0.006
## 27 36.002 -59.6  5.60  0.006
## 61 37.542 -59.6  5.62  0.006
## 59 37.526 -59.6  5.66  0.006
## 26 35.817 -59.2  5.97  0.005
## 29 35.793 -59.2  6.02  0.005
## 22 35.743 -59.1  6.12  0.005
## 23 35.739 -59.1  6.13  0.005
## 20 35.728 -59.1  6.15  0.005
## 14 35.720 -59.0  6.17  0.005
## 12 35.717 -59.0  6.17  0.005
## 8  35.716 -59.0  6.18  0.005
## 15 35.710 -59.0  6.19  0.005
## 44 36.954 -58.4  6.80  0.003
## 47 36.870 -58.2  6.97  0.003
## 52 36.688 -57.9  7.33  0.003
## 55 36.678 -57.9  7.35  0.003
## 46 36.279 -57.1  8.15  0.002
## 40 36.277 -57.1  8.15  0.002
## 54 36.192 -56.9  8.32  0.002
## 31 36.015 -56.5  8.68  0.001
## 28 36.012 -56.5  8.68  0.001
## 62 37.627 -56.4  8.82  0.001
## 60 37.624 -56.4  8.83  0.001
## 63 37.571 -56.3  8.94  0.001
## 30 35.825 -56.1  9.06  0.001
## 24 35.743 -56.0  9.22  0.001
## 16 35.726 -56.0  9.26  0.001
## 48 36.969 -55.1 10.14  0.001
## 56 36.717 -54.6 10.64  0.000
## 32 36.019 -53.2 12.04  0.000
## 64 37.636 -52.7 12.48  0.000
## Models ranked by AICc(x)

Here we see there are 64 possible models based on additive combinations of the variables.

Let’s dredge models for a delta <2/ within two AICc units.

subset(dredge_Body_Condition, delta <2)
## Global model call: lm(formula = Body_Condition_Factor ~ Sat_Gill + DominantWavelength_Gill_nm + 
##     Whiteness_Gill + Chroma_Gill + Tint_Gill + Red_Coloration_Area, 
##     data = Thesis_Data)
## ---
## Model selection table 
##      (Int)  Chr_Gll DmW_Gll_nm Sat_Gll    Tnt_Gll   Wht_Gll df logLik  AICc
## 33  0.7264                                        -0.000412  3 36.048 -65.2
## 1   0.7762                                                   2 34.730 -65.0
## 17  0.7404                             -0.0003712            3 35.714 -64.5
## 2   0.7413 0.001475                                          3 35.698 -64.5
## 9   0.7392                     0.03805                       3 35.688 -64.5
## 3  -0.9560            0.002911                               3 35.146 -63.4
##    delta weight
## 33  0.00  0.225
## 1   0.18  0.206
## 17  0.67  0.161
## 2   0.70  0.159
## 9   0.72  0.157
## 3   1.80  0.091
## Models ranked by AICc(x)

The dredged models for <2 delta AICc, resulting in Model 33, 1, 17, 2, 9, and 3. Notice the top model 33 includes the intercept (slope) and Whiteness_Gill. Model 1 includes the intercept and no other predictor variables. Model 17 includes Tint_Gill. Model 2 includes Chroma_Gill. Model 9 includes Sat_Gill. Model 3 includes Dominant_Wavelength_Gill.

Now let’s rank variables support in the models above. This calculates variable importance weights.

importance(dredge_Body_Condition)
##                      Whiteness_Gill Sat_Gill Tint_Gill Chroma_Gill
## Sum of weights:      0.39           0.30     0.29      0.26       
## N containing models:   32             32       32        32       
##                      DominantWavelength_Gill_nm Red_Coloration_Area
## Sum of weights:      0.23                       0.21               
## N containing models:   32                         32

The results of importance show that each of the six variables show up 32 times in the models with Whiteness_Gill carrying most of the variable weight in the models.

Based on the effects of variables in the models using subset(dredge_Body_Condition, delta <2), I ran the following lm:

model33 <- lm(Body_Condition_Factor~Whiteness_Gill, data = Thesis_Data)
model17 <- lm(Body_Condition_Factor~Tint_Gill, data = Thesis_Data)
model2 <- lm(Body_Condition_Factor~Chroma_Gill, data = Thesis_Data)
model9 <- lm(Body_Condition_Factor~Sat_Gill, data = Thesis_Data)
model3 <- lm(Body_Condition_Factor~DominantWavelength_Gill_nm, data = Thesis_Data)
anova(model33)
## Analysis of Variance Table
## 
## Response: Body_Condition_Factor
##                Df   Sum Sq   Mean Sq F value Pr(>F)
## Whiteness_Gill  1 0.015745 0.0157451  2.5745 0.1194
## Residuals      29 0.177360 0.0061159
summary(model33)
## 
## Call:
## lm(formula = Body_Condition_Factor ~ Whiteness_Gill, data = Thesis_Data)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.120379 -0.052681 -0.008274  0.045050  0.187392 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.7263532  0.0341066  21.297   <2e-16 ***
## Whiteness_Gill -0.0004120  0.0002568  -1.605    0.119    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.0782 on 29 degrees of freedom
## Multiple R-squared:  0.08154,    Adjusted R-squared:  0.04987 
## F-statistic: 2.574 on 1 and 29 DF,  p-value: 0.1194
out.put2<-model.sel(model33, model17, model2, model9, model3)
out.put2
## Model selection table 
##           (Int)   Wht_Gll    Tnt_Gll  Chr_Gll Sat_Gll DmW_Gll_nm
## model33  0.7264 -0.000412                                       
## model17  0.7404           -0.0003712                            
## model2   0.7413                      0.001475                   
## model9   0.7392                               0.03805           
## model3  -0.9560                                         0.002911
##                     family df logLik  AICc delta weight
## model33 gaussian(identity)  3 36.048 -65.2  0.00  0.284
## model17 gaussian(identity)  3 35.714 -64.5  0.67  0.203
## model2  gaussian(identity)  3 35.698 -64.5  0.70  0.200
## model9  gaussian(identity)  3 35.688 -64.5  0.72  0.198
## model3  gaussian(identity)  3 35.146 -63.4  1.80  0.115
## Models ranked by AICc(x)

Low delta <2 for all models with close weights.

importance(out.put2)
##                      Whiteness_Gill Tint_Gill Chroma_Gill Sat_Gill
## Sum of weights:      0.284          0.203     0.200       0.198   
## N containing models:     1              1         1           1   
##                      DominantWavelength_Gill_nm
## Sum of weights:      0.115                     
## N containing models:     1

Here are the importance values of each of the variables used in the model selection with Whiteness_Gill as variable with most weight.

Now to average the models using the lm from out.put2

model.avg(out.put2, revised.var = TRUE)
## 
## Call:
## model.avg(object = out.put2, revised.var = TRUE)
## 
## Component models: 
## '1' '2' '3' '4' '5'
## 
## Coefficients: 
##        (Intercept) Whiteness_Gill     Tint_Gill Chroma_Gill    Sat_Gill
## full      0.541006  -0.0001169260 -7.539821e-05 0.000295049 0.007532723
## subset    0.541006  -0.0004120381 -3.711991e-04 0.001475486 0.038049987
##        DominantWavelength_Gill_nm
## full                  0.000335220
## subset                0.002910704
summary(model.avg(out.put2))
## 
## Call:
## model.avg(object = out.put2)
## 
## Component model call: 
## lm(formula = <5 unique values>, data = Thesis_Data)
## 
## Component models: 
##   df logLik   AICc delta weight
## 1  3  36.05 -65.21  0.00   0.28
## 2  3  35.71 -64.54  0.67   0.20
## 3  3  35.70 -64.51  0.70   0.20
## 4  3  35.69 -64.49  0.72   0.20
## 5  3  35.15 -63.40  1.80   0.12
## 
## Term codes: 
##             Whiteness_Gill                  Tint_Gill 
##                          1                          2 
##                Chroma_Gill                   Sat_Gill 
##                          3                          4 
## DominantWavelength_Gill_nm 
##                          5 
## 
## Model-averaged coefficients:  
## (full average) 
##                              Estimate Std. Error Adjusted SE z value Pr(>|z|)
## (Intercept)                 0.5410060  0.8544425   0.8769507   0.617    0.537
## Whiteness_Gill             -0.0001169  0.0002307   0.0002343   0.499    0.618
## Tint_Gill                  -0.0000754  0.0001924   0.0001958   0.385    0.700
## Chroma_Gill                 0.0002950  0.0007623   0.0007758   0.380    0.704
## Sat_Gill                    0.0075327  0.0196177   0.0199658   0.377    0.706
## DominantWavelength_Gill_nm  0.0003352  0.0014487   0.0014861   0.226    0.822
##  
## (conditional average) 
##                              Estimate Std. Error Adjusted SE z value Pr(>|z|)
## (Intercept)                 0.5410060  0.8544425   0.8769506   0.617    0.537
## Whiteness_Gill             -0.0004120  0.0002568   0.0002680   1.538    0.124
## Tint_Gill                  -0.0003712  0.0002693   0.0002810   1.321    0.186
## Chroma_Gill                 0.0014755  0.0010791   0.0011261   1.310    0.190
## Sat_Gill                    0.0380500  0.0279791   0.0291963   1.303    0.192
## DominantWavelength_Gill_nm  0.0029107  0.0032750   0.0034175   0.852    0.394

Term Codes for the variables: 1 = Whiteness_Gill, 2 = Tint_Gill, 3 = Chroma_Gill, 4 = Sat_Gill, 5 = DominantWavelength_Gill_nm. There are high P-values. You can see from both the full and conditional average that Whiteness_Gill and Tint_Gill have a negative effect on Body Condition Factor, while DominantWavelength_Gill_nm, Sat_Gill, and Chroma_Gill,have a positive effect on Body Condition Factor based on estimate column.

w33 <- ggplot(Thesis_Data, aes(Whiteness_Gill, Body_Condition_Factor)) + 
  geom_point() +
  geom_smooth(method="lm")

w17 <- ggplot(Thesis_Data, aes(Tint_Gill, Body_Condition_Factor)) + 
  geom_point() +
  geom_smooth(method="lm")

w2 <- ggplot(Thesis_Data, aes(Chroma_Gill, Body_Condition_Factor)) + 
  geom_point() +
  geom_smooth(method="lm")

w9 <- ggplot(Thesis_Data, aes(Sat_Gill, Body_Condition_Factor)) + 
  geom_point() +
  geom_smooth(method="lm")

w3 <- ggplot(Thesis_Data, aes(DominantWavelength_Gill_nm, Body_Condition_Factor)) + 
  geom_point() +
  geom_smooth(method="lm")


 w33 / w17 / w2 / w9 / w3  
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

# Binary Data: Site, Season, Sex

leaflet() %>%
  setView(-86.854396, 36.26361 , zoom = 16) %>% #lat-long of the place of interest
  addTiles() %>%
  addProviderTiles('Esri.WorldImagery') %>%
  addMarkers(-86.854396, 36.26361 , popup = "Dry Fork, Whites Creek System")
leaflet() %>%
  setView(-87.287965, 36.499277 , zoom = 16) %>% #lat-long of the place of interest
  addTiles() %>%
  addProviderTiles('Esri.WorldImagery') %>%
  addMarkers(-87.287965, 36.499277 , popup = "Rotary Park, Wall Branch Creek System")
Red_Coloration_Area <- ggplot(Thesis_Data1,aes(Red_Coloration_Area,Season_Collected)) +
  geom_point() +
  geom_smooth() +
  xlab ("Red Coloration Area") +
  ylab ("Season Collected") +
  labs(title="Breeding Vs. Non-breeding Season")

Saturation <- ggplot(Thesis_Data1,aes(Sat_Gill,Season_Collected)) +
  geom_point() +
  geom_smooth() +
  xlab ("Saturation") +
  ylab ("Season Collected") +
  labs(title="Breeding Vs. Non-breeding Season")

Whiteness <- ggplot(Thesis_Data1,aes(Whiteness_Gill,Season_Collected)) +
  geom_point() +
  geom_smooth() +
  xlab ("Whiteness") +
  ylab ("Season Collected") +
  labs(title="Breeding Vs. Non-breeding Season")

Chroma <- ggplot(Thesis_Data1,aes(Chroma_Gill,Season_Collected)) +
  geom_point() +
  geom_smooth() +
  xlab ("Chroma") +
  ylab ("Season Collected") +
  labs(title="Breeding Vs. Non-breeding Season")

Tint <- ggplot(Thesis_Data1,aes(Tint_Gill,Season_Collected)) +
  geom_point() +
  geom_smooth() +
  xlab ("Tint") +
  ylab ("Season Collected") +
  labs(title="Breeding Vs. Non-breeding Season")

Red_Coloration_Area / Saturation / Whiteness / Chroma / Tint
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

GSI_Value <- ggplot(Thesis_Data1,aes(GSI_Value,Season_Collected)) +
  geom_point() +
  geom_smooth() +
  xlab ("GSI") +
  ylab ("Season Collected") +
  labs(title="Breeding Vs. Non-breeding Season")

Body_Condition_Factor <- ggplot(Thesis_Data1,aes(Body_Condition_Factor,Season_Collected)) +
  geom_point() +
  geom_smooth() +
  xlab ("Body Condition Factor") +
  ylab ("Season Collected") +
  labs(title="Breeding Vs. Non-breeding Season")

GSI_Value / Body_Condition_Factor
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Red_Coloration_Area1 <- ggplot(Thesis_Data1,aes(Red_Coloration_Area,Site_Location)) +
  geom_point() +
  geom_smooth() +
  xlab ("Red Coloration Area") +
  ylab ("Site") +
  labs(title="Rotary Park vs. Dry Fork")

Saturation1 <- ggplot(Thesis_Data1,aes(Sat_Gill,Site_Location)) +
  geom_point() +
  geom_smooth() +
  xlab ("Saturation") +
  ylab ("Site") +
  labs(title="Rotary Park vs. Dry Fork")


Whiteness1 <- ggplot(Thesis_Data1,aes(Whiteness_Gill,Site_Location)) +
  geom_point() +
  geom_smooth() +
  xlab ("Whiteness") +
  ylab ("Site") +
  labs(title="Rotary Park vs. Dry Fork")

Chroma1 <- ggplot(Thesis_Data1,aes(Chroma_Gill,Site_Location)) +
  geom_point() +
  geom_smooth() +
  xlab ("Chroma") +
  ylab ("Site") +
  labs(title="Rotary Park vs. Dry Fork")

Tint1 <- ggplot(Thesis_Data1,aes(Tint_Gill,Site_Location)) +
  geom_point() +
  geom_smooth() +
  xlab ("Tint") +
 ylab ("Site") +
  labs(title="Rotary Park vs. Dry Fork")


Red_Coloration_Area1 / Saturation1 / Whiteness1 / Chroma1 / Tint1
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Note there 15 individuals collected from Dry Fork, Wall Branch Creek System and 16 from Rotary Park, Whites Creek System

GSI_Value1 <- ggplot(Thesis_Data1,aes(GSI_Value,Site_Location)) +
  geom_point() +
  geom_smooth() +
  xlab ("GSI") +
  ylab ("Site") +
  labs(title="Rotary Park vs. Dry Fork")

Body_Condition_Factor1 <- ggplot(Thesis_Data1,aes(Body_Condition_Factor,Site_Location)) +
  geom_point() +
  geom_smooth() +
  xlab ("Body Condition Factor") +
  ylab ("Site") +
  labs(title="Rotary Park vs. Dry Fork")

GSI_Value1 / Body_Condition_Factor1
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

GSI_Value_BCF <- ggplot(Thesis_Data1,aes(GSI_Value,Body_Condition_Factor)) +
  geom_point() +
  geom_smooth() +
  xlab ("Gonadosomatic Index (GSI)") +
  ylab ("Body Condition Factor") +
  labs(title="GSI and BCF")

GSI_Value_BCF
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

JUST MAKING SURE MY CALCS ARE CORRECT, WE WOULD DEFINITELY NO DOUBT SEE THIS….DONT INCLUDE IN ANY RESULTS….JUST A MATTER OF MATH CHECKING

GSI_BCF <- lm(Body_Condition_Factor~GSI_Value, data = Thesis_Data)

anova(GSI_BCF)
## Analysis of Variance Table
## 
## Response: Body_Condition_Factor
##           Df   Sum Sq  Mean Sq F value  Pr(>F)  
## GSI_Value  1 0.039168 0.039168  7.3788 0.01101 *
## Residuals 29 0.153937 0.005308                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Binary Data Below….

RUN SOME GLMs WITH RESULTS ABOVE

Site_GSI <- read.csv("Site_GSI.csv")
fit.1 <- glm(Site_Location~GSI_Value, data=Site_GSI, binomial(link="logit"))

anova(fit.1)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Site_Location
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev
## NULL                         30     42.943
## GSI_Value  1   1.6818        29     41.261
summary(fit.1)
## 
## Call:
## glm(formula = Site_Location ~ GSI_Value, family = binomial(link = "logit"), 
##     data = Site_GSI)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3260  -1.2254  -0.5553   1.1255   1.3946  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  0.49833    0.57816   0.862    0.389
## GSI_Value   -0.11082    0.09331  -1.188    0.235
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42.943  on 30  degrees of freedom
## Residual deviance: 41.261  on 29  degrees of freedom
## AIC: 45.261
## 
## Number of Fisher Scoring iterations: 4
autoplot(fit.1)

library(arm)

library(arm)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:patchwork':
## 
##     area
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## arm (Version 1.11-2, built: 2020-7-27)
## Working directory is C:/Users/ridew/OneDrive/Documents/Advanced Analytics/Graduate_Thesis_Analysis/Graduate_Thesis
## 
## Attaching package: 'arm'
## The following object is masked from 'package:performance':
## 
##     display
x <- predict(fit.1)
y <- resid(fit.1)
binnedplot(x, y)

#The results of the binnedplot, show that binned residuals of this data do not present good binary data. #The binned residual plot is used to view points that fall into +/- 2 standard errors; ~95% of the binned residuals.

coef(fit.1)
## (Intercept)   GSI_Value 
##   0.4983295  -0.1108241
confint(fit.1)
## Waiting for profiling to be done...
##                  2.5 %     97.5 %
## (Intercept) -0.6070965 1.70345833
## GSI_Value   -0.3361693 0.05272776
ggplot(Site_GSI, aes(GSI_Value,Site_Location)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Gonadosomatic Index (GSI)") +
  ylab ("Site") +
  labs (title="Raw Fit: 0=Rotary Park, 1=Dry Fork")
## `geom_smooth()` using formula 'y ~ x'

invlogit <- function(x) {1 / ( 1+exp(-x) ) } 
invlogit(coef(fit.1))
## (Intercept)   GSI_Value 
##   0.6220667   0.4723223

INTERPRET

Site_BCF <- read.csv("Site_BCF.csv")
fit.2 <- glm(Site_Location~Body_Condition_Factor, data=Site_BCF, binomial(link="logit"))

anova(fit.2)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Site_Location
## 
## Terms added sequentially (first to last)
## 
## 
##                       Df Deviance Resid. Df Resid. Dev
## NULL                                     30     42.943
## Body_Condition_Factor  1   2.4488        29     40.494
summary(fit.2)
## 
## Call:
## glm(formula = Site_Location ~ Body_Condition_Factor, family = binomial(link = "logit"), 
##     data = Site_BCF)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6788  -1.0549  -0.7832   1.1923   1.4587  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)
## (Intercept)             -5.844      3.877  -1.508    0.132
## Body_Condition_Factor    7.442      4.968   1.498    0.134
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42.943  on 30  degrees of freedom
## Residual deviance: 40.494  on 29  degrees of freedom
## AIC: 44.494
## 
## Number of Fisher Scoring iterations: 4
autoplot(fit.2)

x <- predict(fit.2)
y <- resid(fit.2)
binnedplot(x, y)

coef(fit.2)
##           (Intercept) Body_Condition_Factor 
##             -5.844432              7.442381
confint(fit.2)
## Waiting for profiling to be done...
##                            2.5 %    97.5 %
## (Intercept)           -14.169776  1.383205
## Body_Condition_Factor  -1.819804 18.114250
ggplot(Site_BCF, aes(Body_Condition_Factor,Site_Location)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Body Condition Factor") +
  ylab ("Site") +
  labs (title="Raw Fit: 0=Rotary Park, 1=Dry Fork")
## `geom_smooth()` using formula 'y ~ x'

***********************8

Season_GSI <- read.csv("Season_GSI.csv")
fit.3 <- glm(Season_Collected~GSI_Value, data=Season_GSI, binomial(link="logit"))
anova(fit.3)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Season_Collected
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev
## NULL                         30     42.943
## GSI_Value  1   11.121        29     31.822

summary(fit.3)
## 
## Call:
## glm(formula = Season_Collected ~ GSI_Value, family = binomial(link = "logit"), 
##     data = Season_GSI)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.46646  -0.93314   0.01081   0.85743   1.68542  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -2.3364     1.1061  -2.112   0.0347 *
## GSI_Value     0.5848     0.2915   2.006   0.0449 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42.943  on 30  degrees of freedom
## Residual deviance: 31.822  on 29  degrees of freedom
## AIC: 35.822
## 
## Number of Fisher Scoring iterations: 6
autoplot(fit.3)

x <- predict(fit.3)
y <- resid(fit.3)
binnedplot(x, y)

coef(fit.3)
## (Intercept)   GSI_Value 
##  -2.3363968   0.5847971
confint(fit.3)
## Waiting for profiling to be done...
##                  2.5 %     97.5 %
## (Intercept) -4.9189269 -0.5231682
## GSI_Value    0.1600218  1.2964211
ggplot(Season_GSI, aes(GSI_Value,Season_Collected)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Gonadosomatic Index (GSI)") +
  ylab ("Season Collected") +
  labs (title="Raw Fit: 0=Non-breeding Season, 1=Breeding Season")
## `geom_smooth()` using formula 'y ~ x'

invlogit <- function(x) {1 / ( 1+exp(-x) ) } 
invlogit(coef(fit.3))
## (Intercept)   GSI_Value 
##  0.08815312  0.64217048

INTERPRET


Season_BCF <- read.csv("Season_BCF.csv")
fit.4 <- glm(Season_Collected~Body_Condition_Factor, data=Season_BCF, binomial(link="logit"))

anova(fit.4)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Season_Collected
## 
## Terms added sequentially (first to last)
## 
## 
##                       Df Deviance Resid. Df Resid. Dev
## NULL                                     30     42.943
## Body_Condition_Factor  1   5.1768        29     37.766
summary(fit.4)
## 
## Call:
## glm(formula = Season_Collected ~ Body_Condition_Factor, family = binomial(link = "logit"), 
##     data = Season_BCF)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.9688  -1.0623   0.5223   0.9401   1.7943  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)  
## (Intercept)             -8.795      4.293  -2.048   0.0405 *
## Body_Condition_Factor   11.439      5.533   2.067   0.0387 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42.943  on 30  degrees of freedom
## Residual deviance: 37.766  on 29  degrees of freedom
## AIC: 41.766
## 
## Number of Fisher Scoring iterations: 4
autoplot(fit.4)

x <- predict(fit.4)
y <- resid(fit.4)
binnedplot(x, y)

coef(fit.4)
##           (Intercept) Body_Condition_Factor 
##             -8.794705             11.438501
confint(fit.4)
## Waiting for profiling to be done...
##                            2.5 %    97.5 %
## (Intercept)           -18.319710 -1.058042
## Body_Condition_Factor   1.490894 23.737218
ggplot(Season_BCF, aes(Body_Condition_Factor,Season_Collected)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Body Condition Factor (BCF)") +
  ylab ("Season Collected") +
  labs (title="Raw Fit: 0=Non-breeding Season, 1=Breeding Season")
## `geom_smooth()` using formula 'y ~ x'

invlogit <- function(x) {1 / ( 1+exp(-x) ) } 
invlogit(coef(fit.4))
##           (Intercept) Body_Condition_Factor 
##          0.0001515103          0.9999892275

INTERPRET

#The R output for the boxcox() function plots the maximum likelihood surface (the curve) together with a maximum likelihood-based 95% CI (Hector, 2015)

#Helpful GLM component info from Hector, 2015 Ch8

#GLMs have three components: # 1.) a linear predictor- is what comes after the tilde (~) in our linear model formula # 2.) a variance function - models the variation in the data;make use of a much wider rangefamily of distributions including the poisson, the binomial, and the gamma. #3.) a link function- plays a role equivalent to the transformation in normal least squares models. However, rather than transforming the data we transform the predictions made by the linear predictor. Commonly used link functions include the log, square root, and logistic.

Season_BCF <- read.csv("Season_BCF.csv")
sex <- read.csv("sex.csv")
ggplot(sex, aes(Body_Condition_Factor, Sex)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Body Condition Factor") +
  ylab ("Sex") +
  labs (title="Raw Fit: 0=Female, 1=Male")
## `geom_smooth()` using formula 'y ~ x'

fit.5 <- glm(Sex~Body_Condition_Factor, data=sex, binomial(link="logit"))
anova(fit.5)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Sex
## 
## Terms added sequentially (first to last)
## 
## 
##                       Df Deviance Resid. Df Resid. Dev
## NULL                                     26     34.372
## Body_Condition_Factor  1   2.1842        25     32.188
summary(fit.5)
## 
## Call:
## glm(formula = Sex ~ Body_Condition_Factor, family = binomial(link = "logit"), 
##     data = sex)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2694  -0.9239  -0.6616   1.1494   1.7603  
## 
## Coefficients:
##                       Estimate Std. Error z value Pr(>|z|)
## (Intercept)              5.508      4.394   1.254     0.21
## Body_Condition_Factor   -8.011      5.705  -1.404     0.16
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 34.372  on 26  degrees of freedom
## Residual deviance: 32.188  on 25  degrees of freedom
## AIC: 36.188
## 
## Number of Fisher Scoring iterations: 4
ggplot(sex, aes(GSI_Value, Sex)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("GSI") +
  ylab ("Sex") +
  labs (title="Raw Fit: 0=Female, 1=Male")
## `geom_smooth()` using formula 'y ~ x'

fit.6 <- glm(Sex~GSI_Value, data=sex, binomial(link="logit"))
anova(fit.6)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Sex
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev
## NULL                         26     34.372
## GSI_Value  1   13.743        25     20.629
summary(fit.6)
## 
## Call:
## glm(formula = Sex ~ GSI_Value, family = binomial(link = "logit"), 
##     data = sex)
## 
## Deviance Residuals: 
##      Min        1Q    Median        3Q       Max  
## -1.64909  -0.62058  -0.02353   0.62508   1.55703  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)   4.2567     2.1028   2.024   0.0429 *
## GSI_Value    -1.3854     0.6478  -2.139   0.0325 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 34.372  on 26  degrees of freedom
## Residual deviance: 20.629  on 25  degrees of freedom
## AIC: 24.629
## 
## Number of Fisher Scoring iterations: 7
autoplot(fit.6)

ggplot(sex, aes(Whiteness_Gill,Sex)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Whiteness") +
  ylab ("Sex") +
  labs (title="Raw Fit: 0=Female, 1=Male")
## `geom_smooth()` using formula 'y ~ x'

fit.7 <- glm(Sex~Whiteness_Gill, data=sex, binomial(link="logit"))
anova(fit.7)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Sex
## 
## Terms added sequentially (first to last)
## 
## 
##                Df Deviance Resid. Df Resid. Dev
## NULL                              26     34.372
## Whiteness_Gill  1   1.1937        25     33.178
summary(fit.7)
## 
## Call:
## glm(formula = Sex ~ Whiteness_Gill, family = binomial(link = "logit"), 
##     data = sex)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2398  -0.8955  -0.7389   1.2831   1.6799  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)
## (Intercept)    -1.651433   1.009484  -1.636    0.102
## Whiteness_Gill -0.007844   0.007331  -1.070    0.285
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 34.372  on 26  degrees of freedom
## Residual deviance: 33.178  on 25  degrees of freedom
## AIC: 37.178
## 
## Number of Fisher Scoring iterations: 4
ggplot(sex, aes(Sat_Gill,Sex)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Saturation") +
  ylab ("Sex") +
  labs (title="Raw Fit: 0=Female, 1=Male")
## `geom_smooth()` using formula 'y ~ x'

fit.8 <- glm(Sex~Sat_Gill, data=sex, binomial(link="logit"))
anova(fit.8)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Sex
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev
## NULL                        26     34.372
## Sat_Gill  1  0.93145        25     33.440
summary(fit.8)
## 
## Call:
## glm(formula = Sex ~ Sat_Gill, family = binomial(link = "logit"), 
##     data = sex)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1729  -0.8980  -0.7807   1.3735   1.6428  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -1.4172     0.8827  -1.605    0.108
## Sat_Gill      0.7451     0.7843   0.950    0.342
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 34.372  on 26  degrees of freedom
## Residual deviance: 33.440  on 25  degrees of freedom
## AIC: 37.44
## 
## Number of Fisher Scoring iterations: 4
ggplot(sex, aes(Red_Coloration_Area,Sex)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Red Coloration Area") +
  ylab ("Sex") +
  labs (title="Raw Fit: 0=Female, 1=Male")
## `geom_smooth()` using formula 'y ~ x'

fit.9 <- glm(Sex~Red_Coloration_Area, data=sex, binomial(link="logit"))
anova(fit.9)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Sex
## 
## Terms added sequentially (first to last)
## 
## 
##                     Df Deviance Resid. Df Resid. Dev
## NULL                                   26     34.372
## Red_Coloration_Area  1  0.40118        25     33.971
summary(fit.9)
## 
## Call:
## glm(formula = Sex ~ Red_Coloration_Area, family = binomial(link = "logit"), 
##     data = sex)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0867  -0.9077  -0.8116   1.3558   1.6275  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)
## (Intercept)         -1.09385    0.76958  -1.421    0.155
## Red_Coloration_Area  0.03143    0.04975   0.632    0.527
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 34.372  on 26  degrees of freedom
## Residual deviance: 33.971  on 25  degrees of freedom
## AIC: 37.971
## 
## Number of Fisher Scoring iterations: 4
ggplot(sex, aes(Chroma_Gill,Sex)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Chroma") +
  ylab ("Sex") +
  labs (title="Raw Fit: 0=Female, 1=Male")
## `geom_smooth()` using formula 'y ~ x'

fit.10 <- glm(Sex~Chroma_Gill, data=sex, binomial(link="logit"))
anova(fit.10)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Sex
## 
## Terms added sequentially (first to last)
## 
## 
##             Df Deviance Resid. Df Resid. Dev
## NULL                           26     34.372
## Chroma_Gill  1  0.77017        25     33.602
summary(fit.10)
## 
## Call:
## glm(formula = Sex ~ Chroma_Gill, family = binomial(link = "logit"), 
##     data = sex)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1750  -0.8845  -0.7934   1.3836   1.6103  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.30702    0.83211  -1.571    0.116
## Chroma_Gill  0.02548    0.02924   0.871    0.384
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 34.372  on 26  degrees of freedom
## Residual deviance: 33.602  on 25  degrees of freedom
## AIC: 37.602
## 
## Number of Fisher Scoring iterations: 4
ggplot(sex, aes(Tint_Gill,Sex)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Tint") +
  ylab ("Sex") +
  labs (title="Raw Fit: 0=Female, 1=Male")
## `geom_smooth()` using formula 'y ~ x'

fit.11 <- glm(Sex~Tint_Gill, data=sex, binomial(link="logit"))
anova(fit.11)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Sex
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev
## NULL                         26     34.372
## Tint_Gill  1  0.80716        25     33.565
summary(fit.11)
## 
## Call:
## glm(formula = Sex ~ Tint_Gill, family = binomial(link = "logit"), 
##     data = sex)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1548  -0.8975  -0.7839   1.3845   1.6635  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.336593   0.849369  -1.574    0.116
## Tint_Gill   -0.006685   0.007530  -0.888    0.375
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 34.372  on 26  degrees of freedom
## Residual deviance: 33.565  on 25  degrees of freedom
## AIC: 37.565
## 
## Number of Fisher Scoring iterations: 4
fit.12 <- glm(Sex~DominantWavelength_Gill_nm, data=sex, binomial(link="logit"))
anova(fit.12)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Sex
## 
## Terms added sequentially (first to last)
## 
## 
##                            Df Deviance Resid. Df Resid. Dev
## NULL                                          26     34.372
## DominantWavelength_Gill_nm  1  0.01331        25     34.358
summary(fit.12)
## 
## Call:
## glm(formula = Sex ~ DominantWavelength_Gill_nm, family = binomial(link = "logit"), 
##     data = sex)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9232  -0.9097  -0.8894   1.4700   1.5377  
## 
## Coefficients:
##                            Estimate Std. Error z value Pr(>|z|)
## (Intercept)                -7.12970   55.96033  -0.127    0.899
## DominantWavelength_Gill_nm  0.01082    0.09408   0.115    0.908
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 34.372  on 26  degrees of freedom
## Residual deviance: 34.358  on 25  degrees of freedom
## AIC: 38.358
## 
## Number of Fisher Scoring iterations: 4

NOTE 9 MALES VS. 18 FEMALES SEXED

Binary Data: Season

season <- read.csv("season.csv")
season_whiteness <- glm(Season_Collected~Whiteness_Gill, data=season, binomial(link="logit"))

anova(season_whiteness)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Season_Collected
## 
## Terms added sequentially (first to last)
## 
## 
##                Df Deviance Resid. Df Resid. Dev
## NULL                              30     42.943
## Whiteness_Gill  1    5.472        29     37.471
summary(season_whiteness)
## 
## Call:
## glm(formula = Season_Collected ~ Whiteness_Gill, family = binomial(link = "logit"), 
##     data = season)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5771  -1.0992   0.3923   1.0637   1.6442  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)  
## (Intercept)    -2.057820   1.093577  -1.882   0.0599 .
## Whiteness_Gill -0.017829   0.008743  -2.039   0.0414 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42.943  on 30  degrees of freedom
## Residual deviance: 37.471  on 29  degrees of freedom
## AIC: 41.471
## 
## Number of Fisher Scoring iterations: 3
autoplot(season_whiteness)

x <- predict(season_whiteness)
y <- resid(season_whiteness)
binnedplot(x, y)

coef(season_whiteness)
##    (Intercept) Whiteness_Gill 
##    -2.05782012    -0.01782885
confint(season_whiteness)
## Waiting for profiling to be done...
##                      2.5 %       97.5 %
## (Intercept)    -4.51622094 -0.109849268
## Whiteness_Gill -0.03791468 -0.002640105
ggplot(season, aes(Whiteness_Gill, Season_Collected)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Whiteness") +
  ylab ("Season Collected") +
  labs (title="Raw Fit: 0=Non-breeding, 1=Breeding")
## `geom_smooth()` using formula 'y ~ x'

season_sat <- glm(Season_Collected~Sat_Gill, data=season, binomial(link="logit"))

anova(season_sat)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Season_Collected
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev
## NULL                        30     42.943
## Sat_Gill  1   3.8451        29     39.098
summary(season_sat)
## 
## Call:
## glm(formula = Season_Collected ~ Sat_Gill, family = binomial(link = "logit"), 
##     data = season)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6740  -1.1352   0.3925   1.0697   1.5099  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept)  -1.4410     0.9208  -1.565    0.118  
## Sat_Gill      1.5858     0.9089   1.745    0.081 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42.943  on 30  degrees of freedom
## Residual deviance: 39.098  on 29  degrees of freedom
## AIC: 43.098
## 
## Number of Fisher Scoring iterations: 3
autoplot(season_sat)

x <- predict(season_sat)
y <- resid(season_sat)
binnedplot(x, y)

coef(season_sat)
## (Intercept)    Sat_Gill 
##   -1.441022    1.585776
confint(season_sat)
## Waiting for profiling to be done...
##                     2.5 %    97.5 %
## (Intercept) -3.4742170007 0.2307588
## Sat_Gill     0.0008186747 3.6562478
ggplot(season, aes(Sat_Gill, Season_Collected)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Saturation") +
  ylab ("Season Collected") +
  labs (title="Raw Fit: 0=Non-breeding, 1=Breeding")
## `geom_smooth()` using formula 'y ~ x'

season_chroma <- glm(Season_Collected~Chroma_Gill, data=season, binomial(link="logit"))

anova(season_chroma)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Season_Collected
## 
## Terms added sequentially (first to last)
## 
## 
##             Df Deviance Resid. Df Resid. Dev
## NULL                           30     42.943
## Chroma_Gill  1   9.2716        29     33.671
summary(season_chroma)
## 
## Call:
## glm(formula = Season_Collected ~ Chroma_Gill, family = binomial(link = "logit"), 
##     data = season)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.7268  -0.9334   0.1642   0.8161   1.7843  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -2.45349    1.08376  -2.264   0.0236 *
## Chroma_Gill  0.11231    0.04692   2.394   0.0167 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42.943  on 30  degrees of freedom
## Residual deviance: 33.671  on 29  degrees of freedom
## AIC: 37.671
## 
## Number of Fisher Scoring iterations: 5
autoplot(season_chroma)

x <- predict(season_chroma)
y <- resid(season_chroma)
binnedplot(x, y)

coef(season_chroma)
## (Intercept) Chroma_Gill 
##  -2.4534932   0.1123059
confint(season_chroma)
## Waiting for profiling to be done...
##                   2.5 %     97.5 %
## (Intercept) -4.91273947 -0.5739551
## Chroma_Gill  0.03436441  0.2213027
ggplot(season, aes(Chroma_Gill, Season_Collected)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Chroma") +
  ylab ("Season Collected") +
  labs (title="Raw Fit: 0=Non-breeding, 1=Breeding")
## `geom_smooth()` using formula 'y ~ x'

season_tint <- glm(Season_Collected~Tint_Gill, data=season, binomial(link="logit"))

anova(season_tint)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Season_Collected
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev
## NULL                         30     42.943
## Tint_Gill  1   3.3121        29     39.631
summary(season_tint)
## 
## Call:
## glm(formula = Season_Collected ~ Tint_Gill, family = binomial(link = "logit"), 
##     data = season)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6394  -1.1343   0.4461   1.0709   1.5053  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -1.245031   0.860555  -1.447   0.1480  
## Tint_Gill   -0.013858   0.008406  -1.649   0.0992 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42.943  on 30  degrees of freedom
## Residual deviance: 39.631  on 29  degrees of freedom
## AIC: 43.631
## 
## Number of Fisher Scoring iterations: 4
autoplot(season_tint)

x <- predict(season_tint)
y <- resid(season_tint)
binnedplot(x, y)

coef(season_tint)
## (Intercept)   Tint_Gill 
## -1.24503120 -0.01385841
confint(season_tint)
## Waiting for profiling to be done...
##                   2.5 %       97.5 %
## (Intercept) -3.12656347 0.3354804956
## Tint_Gill   -0.03285285 0.0009869346
ggplot(season, aes(Tint_Gill, Season_Collected)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Tint") +
  ylab ("Season Collected") +
  labs (title="Raw Fit: 0=Non-breeding, 1=Breeding")
## `geom_smooth()` using formula 'y ~ x'

season_area <- glm(Season_Collected~Red_Coloration_Area, data=season, binomial(link="logit"))

anova(season_area)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Season_Collected
## 
## Terms added sequentially (first to last)
## 
## 
##                     Df Deviance Resid. Df Resid. Dev
## NULL                                   30     42.943
## Red_Coloration_Area  1 0.024822        29     42.918
summary(season_area)
## 
## Call:
## glm(formula = Season_Collected ~ Red_Coloration_Area, family = binomial(link = "logit"), 
##     data = season)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.239  -1.202   1.118   1.149   1.190  
## 
## Coefficients:
##                      Estimate Std. Error z value Pr(>|z|)
## (Intercept)          0.157001   0.688645   0.228    0.820
## Red_Coloration_Area -0.007035   0.044666  -0.158    0.875
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42.943  on 30  degrees of freedom
## Residual deviance: 42.918  on 29  degrees of freedom
## AIC: 46.918
## 
## Number of Fisher Scoring iterations: 3
ggplot(season, aes(Red_Coloration_Area, Season_Collected)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Red Coloration Area") +
  ylab ("Season Collected") +
  labs (title="Raw Fit: 0=Non-breeding, 1=Breeding")
## `geom_smooth()` using formula 'y ~ x'

Lastly Site

site <- read.csv("site.csv")
site_whiteness <- glm(Site_Location~Whiteness_Gill, data=site, binomial(link="logit"))

anova(site_whiteness)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Site_Location
## 
## Terms added sequentially (first to last)
## 
## 
##                Df Deviance Resid. Df Resid. Dev
## NULL                              30     42.943
## Whiteness_Gill  1   2.6766        29     40.266
summary(site_whiteness)
## 
## Call:
## glm(formula = Site_Location ~ Whiteness_Gill, family = binomial(link = "logit"), 
##     data = site)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4765  -1.1027  -0.7121   1.1828   1.5201  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)
## (Intercept)    -1.451743   0.974793  -1.489    0.136
## Whiteness_Gill -0.011474   0.007483  -1.533    0.125
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42.943  on 30  degrees of freedom
## Residual deviance: 40.266  on 29  degrees of freedom
## AIC: 44.266
## 
## Number of Fisher Scoring iterations: 4
ggplot(site, aes(Whiteness_Gill, Site_Location)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Whiteness") +
  ylab ("Site") +
  labs (title="Raw Fit:  0=Rotary Park, 1=Dry Fork")
## `geom_smooth()` using formula 'y ~ x'

site_sat <- glm(Site_Location~Sat_Gill, data=site, binomial(link="logit"))

anova(site_sat)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Site_Location
## 
## Terms added sequentially (first to last)
## 
## 
##          Df Deviance Resid. Df Resid. Dev
## NULL                        30     42.943
## Sat_Gill  1   2.8503        29     40.093
summary(site_sat)
## 
## Call:
## glm(formula = Site_Location ~ Sat_Gill, family = binomial(link = "logit"), 
##     data = site)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.390  -1.115  -0.751   1.197   1.537  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept)  -1.3225     0.8821  -1.499    0.134
## Sat_Gill      1.3053     0.8419   1.550    0.121
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42.943  on 30  degrees of freedom
## Residual deviance: 40.093  on 29  degrees of freedom
## AIC: 44.093
## 
## Number of Fisher Scoring iterations: 4
ggplot(site, aes(Sat_Gill, Site_Location)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Saturation") +
  ylab ("Site") +
  labs (title="Raw Fit:  0=Rotary Park, 1=Dry Fork")
## `geom_smooth()` using formula 'y ~ x'

site_chroma <- glm(Site_Location~Chroma_Gill, data=site, binomial(link="logit"))

anova(site_chroma)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Site_Location
## 
## Terms added sequentially (first to last)
## 
## 
##             Df Deviance Resid. Df Resid. Dev
## NULL                           30     42.943
## Chroma_Gill  1   1.1042        29     41.839
summary(site_chroma)
## 
## Call:
## glm(formula = Site_Location ~ Chroma_Gill, family = binomial(link = "logit"), 
##     data = site)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3353  -1.1234  -0.9076   1.2453   1.3963  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.76086    0.76997  -0.988    0.323
## Chroma_Gill  0.02952    0.02894   1.020    0.308
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42.943  on 30  degrees of freedom
## Residual deviance: 41.839  on 29  degrees of freedom
## AIC: 45.839
## 
## Number of Fisher Scoring iterations: 4
ggplot(site, aes(Chroma_Gill, Site_Location)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Chroma") +
  ylab ("Site") +
  labs (title="Raw Fit:  0=Rotary Park, 1=Dry Fork")
## `geom_smooth()` using formula 'y ~ x'

site_area <- glm(Site_Location~Red_Coloration_Area, data=site, binomial(link="logit"))

anova(site_area)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Site_Location
## 
## Terms added sequentially (first to last)
## 
## 
##                     Df Deviance Resid. Df Resid. Dev
## NULL                                   30     42.943
## Red_Coloration_Area  1   3.0095        29     39.933
summary(site_area)
## 
## Call:
## glm(formula = Site_Location ~ Red_Coloration_Area, family = binomial(link = "logit"), 
##     data = site)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.4628  -1.0428  -0.8171   1.0143   1.6107  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)  
## (Intercept)         -1.12780    0.74185  -1.520   0.1284  
## Red_Coloration_Area  0.08076    0.04856   1.663   0.0963 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42.943  on 30  degrees of freedom
## Residual deviance: 39.933  on 29  degrees of freedom
## AIC: 43.933
## 
## Number of Fisher Scoring iterations: 4
autoplot(site_area)

x <- predict(site_area)
y <- resid(site_area)
binnedplot(x, y)

coef(site_area)
##         (Intercept) Red_Coloration_Area 
##         -1.12779696          0.08075857
confint(site_area)
## Waiting for profiling to be done...
##                           2.5 %    97.5 %
## (Intercept)         -2.70436303 0.2659941
## Red_Coloration_Area -0.01017251 0.1839036
ggplot(site, aes(Red_Coloration_Area, Site_Location)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Red Coloration Area") +
  ylab ("Site") +
  labs (title="Raw Fit:  0=Rotary Park, 1=Dry Fork")
## `geom_smooth()` using formula 'y ~ x'

site_tint <- glm(Site_Location~Tint_Gill, data=site, binomial(link="logit"))

anova(site_tint)
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: Site_Location
## 
## Terms added sequentially (first to last)
## 
## 
##           Df Deviance Resid. Df Resid. Dev
## NULL                         30     42.943
## Tint_Gill  1   2.8449        29     40.098
summary(site_tint)
## 
## Call:
## glm(formula = Site_Location ~ Tint_Gill, family = binomial(link = "logit"), 
##     data = site)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.3847  -1.1112  -0.7514   1.1894   1.5382  
## 
## Coefficients:
##              Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.262722   0.846711  -1.491    0.136
## Tint_Gill   -0.012517   0.008058  -1.553    0.120
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42.943  on 30  degrees of freedom
## Residual deviance: 40.098  on 29  degrees of freedom
## AIC: 44.098
## 
## Number of Fisher Scoring iterations: 4
ggplot(site, aes(Tint_Gill, Site_Location)) +
  geom_point() +
  geom_smooth(method="glm", method.args=list(family="binomial"(link="logit"))) +
  xlab ("Tint") +
  ylab ("Site") +
  labs (title="Raw Fit:  0=Rotary Park, 1=Dry Fork")
## `geom_smooth()` using formula 'y ~ x'

boxplot(Tint_Gill~Season_Collected, data=season)

tint_t <- t.test(Tint_Gill ~ Season_Collected, data = season, var.equal = TRUE)

tint_t
## 
##  Two Sample t-test
## 
## data:  Tint_Gill by Season_Collected
## t = 1.7868, df = 29, p-value = 0.08442
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -4.805333 71.266083
## sample estimates:
## mean in group 0 mean in group 1 
##        -79.3440       -112.5744
boxplot(Sat_Gill~Season_Collected, data=season)

sat_t <- t.test(Sat_Gill ~ Season_Collected, data = season, var.equal = TRUE)

sat_t
## 
##  Two Sample t-test
## 
## data:  Sat_Gill by Season_Collected
## t = -1.9264, df = 29, p-value = 0.06391
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -0.70572499  0.02111665
## sample estimates:
## mean in group 0 mean in group 1 
##       0.7951333       1.1374375
boxplot(Chroma_Gill~Season_Collected, data=season)

chroma_t <- t.test(Chroma_Gill ~ Season_Collected, data = season, var.equal = TRUE)

chroma_t
## 
##  Two Sample t-test
## 
## data:  Chroma_Gill by Season_Collected
## t = -3.0848, df = 29, p-value = 0.004445
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -21.773597  -4.412453
## sample estimates:
## mean in group 0 mean in group 1 
##        16.88860        29.98163
boxplot(Whiteness_Gill~Season_Collected, data=season)

whiteness_t <- t.test(Whiteness_Gill ~ Season_Collected, data = season, var.equal = TRUE)

whiteness_t
## 
##  Two Sample t-test
## 
## data:  Whiteness_Gill by Season_Collected
## t = 2.3544, df = 29, p-value = 0.02554
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##   5.75714 81.93036
## sample estimates:
## mean in group 0 mean in group 1 
##        -98.4000       -142.2438
boxplot(Red_Coloration_Area~Season_Collected, data=season)

area_t <- t.test(Red_Coloration_Area ~ Season_Collected, data = season, var.equal = TRUE)

area_t
## 
##  Two Sample t-test
## 
## data:  Red_Coloration_Area by Season_Collected
## t = 0.15244, df = 29, p-value = 0.8799
## alternative hypothesis: true difference in means between group 0 and group 1 is not equal to 0
## 95 percent confidence interval:
##  -5.659793  6.571409
## sample estimates:
## mean in group 0 mean in group 1 
##        13.37125        12.91545